* Set current directory and open log
cd "E:\Seagate Sync\VOL\Personal folder\Documents\Research\Current projects\Working party meetings\Posted\EUP 160404"
capture log close
log using "Data management\worg-management01", replace text


**********************************************************
* Cleaning and preparation of working party lifecycle data
**********************************************************


* Project and author information
********************************
* Programme:	worg-management01.do
* Date:			7 February 2015
* Author:		Frank Haege, University of Limerick
* Contact:		frank.haege@ul.ie 

* Do-file description
*********************
* This do-file conducts consistency checks, defines variable names and value labels, 
* generates formatted date variables and a general policy area variable.


* Clear memory  
**************
version 12.0
clear all
macro drop _all
set more off


* Load dataset
**************
insheet using "Input data\wpar-data-lifecycle-150523.csv", comma names case clear
des, s
* 486 working parties

* Check variables
codebook


****************************
* Format and label variables
****************************

* Generate WP birth date variable
generate wpbirth2 = .
replace wpbirth2 = date(wpbirth, "DMY")
format %td wpbirth2
drop wpbirth
rename wpbirth2 wpbirth

* Generate WP death date variable
generate wpdeath2 = .
replace wpdeath2 = date(wpdeath, "DMY")
format %td wpdeath2
drop wpdeath
rename wpdeath2 wpdeath

* Generate running number for working parties
sort wpid
generate wprunno = _n
order wprunno

* Code reason for abolishment variable
replace cod = 0 if cod == .

* Code absent death date for working parties that have not be abolished yet
replace deathdate = 0 if deathdate == . & wpdeath == .

* Label variables
label var wprunno "WP running number"
label var wpid "WP ID"
rename wpsectornew sectornew
label var sectornew "WP sector (post-Seville)"
label var wpname "WP name"
label var lcensor "WP left censored (yes/no)"
label val lcensor yesno
label var succid "WP successor ID"
label var succname "WP successor name"
rename wpbirth birth
label var birth "WP establishment date"
rename wpdeath death
label var death "WP abolishment date"
label var birthdate "WP exact establishement date (yes/no)"
label val birthdate yesno
label var deathdate "WP exact abolishment date (yes/no)"
label val deathdate yesno
rename doc birthdoc
label var birthdoc "WP Council or Eurlex number of birth source document"
label var cod "WP abolishment reason"
label var comment "WP coding comment"
label def codl /*
	*/ 0 "Not abolished" /*
	*/ 1 "Abolished" /*
	*/ 2 "Subsumed" /*
	*/ 3 "Merged" /*
	*/ 4 "Downgraded" /*
	*/ 5 "Reallocated" /*
	*/ 6 "Extended" /*
	*/ 7 "Upgraded" /*
	*/ 8 "Moved to new sector", modify
label val cod codl
tab cod, m

* Order variables
order wprunno wpid sectornew lcensor birth birthdate death deathdate cod wpname

* Reformat working party id variable
replace wpid = regexr(wpid, "\.", "")
replace wpid = regexr(wpid, "\.", "")
replace wpid = regexr(wpid, "\)", "")

* Reformat successor working party id variable
replace succid = regexr(succid, "\.", "")
replace succid = regexr(succid, "\.", "")
replace succid = regexr(succid, "\)", "")


*****************************
* Internal consistency checks
*****************************

* Does each ID occur only once?
tab wpid, m
duplicates list wpid

* Does each working party name occur only once?
tab wpname, m
duplicates list wpname
* These are not errors. Over time, two distinct groups were called WP on Statistics.
* The other two groups were moved from one policy sector into the other over time. 

* Do all left-censored cases miss birth dates?
tab lcensor, m
tab birth, m
tab birth lcensor, m

* Do all working parties that have been incorporated into another working party have a successor ID?
tab cod, m
tab succid cod, m

* Same number of successor IDs and successor names?
tab succid, m
tab succname, m

* Do all left-censored working parties miss an exact birth date?
tab birthdate, m
tab birthdate lcensor, m

* Do all working parties coded as having an exact birth date actually do have a unique date?
tab birth birthdate, m
list wpid wpname birth birthdoc if birth == d(07jul2004) | birth == d(01mar2000)
* Ad hoc working parties happen to be established by Coreper on the same day
* The establishment of the interim PSC and interim Military Committee are connected

* Any missing values for birth document?
tab birthdoc, m

* Do all working parties coded as having an exact death date actually do have a unique date?
tab deathdate, m
tab death deathdate, m
tab cod deathdate, m
tab death, m

* Is coding of ID and sector variable consistent?
tab sectornew, m
tab wpid sectornew, m

* Check consistency of dates
list wprunno wpid wpname birth death if birth >= death & birth != .
list wprunno wpid wpname birth death if (birth < d(26nov1999) | birth > d(15jan2015)) & birth != .
list wprunno wpid wpname birth death if (death < d(26nov1999) | birth > d(15jan2015)) & birth != .

* Check working party ID codes
tab wpid succid, m
* All successor codes also appear as working party codes

* Check consistency of coding of successor working parties
list if succid == "" & succname != ""
list if succid != "" & succname == ""

* Check that abolished groups do not have a successor ID
list wpid wpname succname if succid != "" & cod == 1
* These are all group that had been temporarily abolished


********************************
* Generate policy field variable
********************************

* Generate working party sector variable pre-Seville
generate sectorold = substr(wpid, 1, 1)
label var sectorold "WP sector (pre-Seville)"
tab sectorold, m
order sectorold, before(sectornew)

* Allocate Treaty committees to different policy areas
generate policy = sectorold
label var policy "Policy area"
tab wpid if policy == "A", m
tab wpname if policy == "A", m
tab policy, m
replace policy = "B" if wpid == "A01"
replace policy = "B" if wpid == "A01a"
replace policy = "B" if wpid == "A01b"
replace policy = "F" if wpid == "A02"
replace policy = "P" if wpid == "A03"
replace policy = "C" if wpid == "A04"
replace policy = "C" if wpid == "A04a"
replace policy = "C" if wpid == "A04b"
replace policy = "C" if wpid == "A04c"
replace policy = "C" if wpid == "A04d"
replace policy = "C" if wpid == "A04e"
replace policy = "C" if wpid == "A04f"
replace policy = "C" if wpid == "A04g"
replace policy = "C" if wpid == "A04h"
replace policy = "C" if wpid == "A04i"
replace policy = "C" if wpid == "A05"
replace policy = "G" if wpid == "A06"
replace policy = "H" if wpid == "A07"
replace policy = "B" if wpid == "A08"
replace policy = "B" if wpid == "A09"
replace policy = "B" if wpid == "A10"
replace policy = "B" if wpid == "A10a"
replace policy = "B" if wpid == "A10b"
replace policy = "B" if wpid == "A10c"
replace policy = "B" if wpid == "A10d"
replace policy = "B" if wpid == "A10e"
replace policy = "B" if wpid == "A11"
replace policy = "B" if wpid == "A12"
replace policy = "B" if wpid == "A12a"
replace policy = "B" if wpid == "A12b"
replace policy = "B" if wpid == "A12c"
replace policy = "B" if wpid == "A12d"
replace policy = "F" if wpid == "A13"
replace policy = "F" if wpid == "A14"
replace policy = "G" if wpid == "A15"

* Merge Information Society with Telecommunications
tab wpname if policy == "M"
replace policy = "L" if policy == "M"

* Merge Industry with Internal Market
tab wpname if policy == "J"
replace policy = "K" if policy == "J"

* Generate numerical policy variable
tab policy, m
generate policystr = policy
drop policy
generate policy = .
replace policy = 1 if policystr == "B"
replace policy = 2 if policystr == "C"
replace policy = 3 if policystr == "D"
replace policy = 4 if policystr == "E"
replace policy = 5 if policystr == "F"
replace policy = 6 if policystr == "G"
replace policy = 7 if policystr == "H"
replace policy = 8 if policystr == "I"
replace policy = 9 if policystr == "K"
replace policy = 10 if policystr == "L" 
replace policy = 11 if policystr == "N"
replace policy = 12 if policystr == "O"
replace policy = 13 if policystr == "P"
replace policy = 14 if policystr == "Q"
replace policy = 15 if policystr == "R"
replace policy = 16 if policystr == "S"
replace policy = 17 if policystr == "T"
tab policy, m
tab policystr, m
drop policystr
label var policy "WP policy area"
order policy, before(sectorold)

* Label numerical policy variable
label def policyl /*
	*/ 1 "General Affairs" /*
	*/ 2 "Foreign Affairs" /*
	*/ 3 "Development" /*
	*/ 4 "Budget" /*
	*/ 5 "Economic & Financial Affairs" /*
	*/ 6 "Justice & Home Affairs" /*
	*/ 7 "Agriculture" /*
	*/ 8 "Fisheries" /*
	*/ 9 "Internal Market & Industry" /*
	*/ 10 "Telecommunications" /*
	*/ 11 "Energy" /*
	*/ 12 "Research" /*
	*/ 13 "Employment & Soc. policy" /*
	*/ 14 "Transport" /*
	*/ 15 "Environment" /*
	*/ 16 "Health & Consumer policy" /*
	*/ 17 "Education, Youth & Culture", modify
label val policy policyl
tab policy, m

* Save dataset and exit
compress
save "Data management\worg-management01.dta", replace
log close
exit
